import requests
import pymysql
from lxml import etree
conn=pymysql.connect(host='localhost',
                     user='root',
                     password='123456',
                     database='db1',
                     charset='utf8'
                     )
#创建游标
cursor=conn.cursor()
def douban():
    # """ 抓取解析数据 """
    # 我们现在写的爬虫，非常容易被识别
    base_url = 'https://movie.douban.com/top250?start={}&filter='


for i in range(10):
    url = f'https://movie.douban.com/top250?start={i * 25}&filter='
    con = 1
    while con <= 25:

        # 所以需要伪装
        header = {
            "User-Agent": "user-agent:Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36 Edg/127.0.0.0"}
        res = requests.get(url, headers=header)

        if res.status_code != 200:
            print("抓取数据失败！")

        html = res.text
        root = etree.HTML(html)
        p = root.xpath(f"/html/body/div[3]/div[1]/div/div[1]/ol/li[{con}]/div/div[2]/div[1]/a/span[1]/text()")[0]
        a = root.xpath(f"/html/body/div[3]/div[1]/div/div[1]/ol/li[{con}]/div/div[2]/div[1]/a/@href")[0]  # url
        b = root.xpath(f"/html/body/div[3]/div[1]/div/div[1]/ol/li[{con}]/div/div[1]/a/img/@src")[0]  # 图片
        c = root.xpath(f"/html/body/div[3]/div[1]/div/div[1]/ol/li[{con}]/div/div[2]/div[2]/div/span[2]/text()")[0]  # 评分
        d = root.xpath(f"/html/body/div[3]/div[1]/div/div[1]/ol/li[{con}]/div/div[2]/div[2]/div/span[4]/text()")[0].replace('人评价','')  # 评价人数
        e = root.xpath(f"/html/body/div[3]/div[1]/div/div[1]/ol/li[{con}]/div/div[2]/div[2]/p[2]/span/text()")  # 简介
        e = e[0] if e else '_'
        f = root.xpath(f"/html/body/div[3]/div[1]/div/div[1]/ol/li[{con}]/div/div[1]/em/text()")[0]  #top
        new_list = [f, p, c, d, a, e, b]
        print(c)
        print(d)
        print(new_list)
        con = con + 1

        sql = f" INSERT INTO doubanmovie_3 (排名,电影名,评分,评价人数,简介,url,图片) VALUES('{f}',N'{p}' ,N'{c}',N'{d}',N'{e}',N'{a}',N'{b}')"
        try:
            cursor.execute(sql)
            conn.commit()
            print(cursor.rowcount, '条数据插入成功！')
        except:
            print('错误')
